From 2d49de48f01d151bb58e01e1a120090965a90e50 Mon Sep 17 00:00:00 2001 From: "akw27@boulderdash.cl.cam.ac.uk" Date: Wed, 29 Jan 2003 11:17:07 +0000 Subject: [PATCH] bitkeeper revision 1.15.3.2 (3e37b8332YRktwAjVLsh2PyFFW2XNw) RX data is now moved in a domain-memory page, but still copied at the end. --- xen-2.4.16/drivers/net/tulip/interrupt.c | 5 +- xen-2.4.16/include/asm-i386/pci.h | 14 ++- xen-2.4.16/include/xeno/skbuff.h | 21 +++-- xen-2.4.16/net/dev.c | 44 ++++++++++ xen-2.4.16/net/eth.c | 70 ++++++++------- xen-2.4.16/net/skbuff.c | 105 ++++++++++++++++++++++- 6 files changed, 220 insertions(+), 39 deletions(-) diff --git a/xen-2.4.16/drivers/net/tulip/interrupt.c b/xen-2.4.16/drivers/net/tulip/interrupt.c index c92b12ea92..8e88f2f457 100644 --- a/xen-2.4.16/drivers/net/tulip/interrupt.c +++ b/xen-2.4.16/drivers/net/tulip/interrupt.c @@ -170,8 +170,9 @@ static int tulip_rx(struct net_device *dev) #endif /* Check if the packet is long enough to accept without copying to a minimally-sized skbuff. */ - if (pkt_len < tulip_rx_copybreak - && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { + //if (pkt_len < tulip_rx_copybreak + // && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { + if (0) { skb->dev = dev; skb_reserve(skb, 2); /* 16 byte align the IP header */ pci_dma_sync_single(tp->pdev, diff --git a/xen-2.4.16/include/asm-i386/pci.h b/xen-2.4.16/include/asm-i386/pci.h index 9ab9c282fe..43fab42762 100644 --- a/xen-2.4.16/include/asm-i386/pci.h +++ b/xen-2.4.16/include/asm-i386/pci.h @@ -75,7 +75,19 @@ static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, if (direction == PCI_DMA_NONE) BUG(); flush_write_buffers(); - return virt_to_bus(ptr); + + if ((unsigned long) ptr > PAGE_OFFSET) + return virt_to_bus(ptr); + + /* If an address that is not in hypervisor VM is passed to this + * function (ie > PAGE_OFFSET) we assume that the passer knows + * what they are doing, and have passed a physical address that + * should not be converted here. This is a little hackish, but + * is being added to allow references to domain memory in order + * to support zero-copy network code. + */ + + return (dma_addr_t) ptr; } /* Unmap a single streaming mode DMA translation. The dma_addr and size diff --git a/xen-2.4.16/include/xeno/skbuff.h b/xen-2.4.16/include/xeno/skbuff.h index f9c38c12bf..dd6257b370 100644 --- a/xen-2.4.16/include/xeno/skbuff.h +++ b/xen-2.4.16/include/xeno/skbuff.h @@ -34,6 +34,10 @@ #define VIF_DROP -3 #define VIF_ANY_INTERFACE -4 +//skb_type values: +#define SKB_NORMAL 0 +#define SKB_ZERO_COPY 1 + #define HAVE_ALLOC_SKB /* For the drivers to know */ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ #define SLAB_SKB /* Slabified skbuffs */ @@ -187,7 +191,7 @@ struct sk_buff { unsigned int data_len; unsigned int csum; /* Checksum */ unsigned char __unused, /* Dead field, may be reused */ - cloned, /* head may be cloned (check refcnt to be sure). */ + cloned, /* head may be cloned (check refcnt to be sure) */ pkt_type, /* Packet class */ ip_summed; /* Driver fed us an IP checksum */ __u32 priority; /* Packet queueing priority */ @@ -203,8 +207,12 @@ struct sk_buff { void (*destructor)(struct sk_buff *); /* Destruct function */ - int src_vif; /* vif we came from */ - int dst_vif; /* vif we are bound for */ + unsigned int skb_type; /* SKB_NORMAL or SKB_ZERO_COPY */ + struct pfn_info *pf; /* record of physical pf address for freeing */ + int src_vif; /* vif we came from */ + int dst_vif; /* vif we are bound for */ + struct skb_shared_info shinfo; /* shared info is no longer shared in Xen. */ + @@ -244,6 +252,7 @@ struct sk_buff { extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff * alloc_skb(unsigned int size, int priority); +extern struct sk_buff * alloc_zc_skb(unsigned int size, int priority); extern void kfree_skbmem(struct sk_buff *skb); extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority); @@ -259,7 +268,8 @@ extern void skb_over_panic(struct sk_buff *skb, int len, void *here); extern void skb_under_panic(struct sk_buff *skb, int len, void *here); /* Internal */ -#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) +//#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) +#define skb_shinfo(SKB) ((struct skb_shared_info *)(&(SKB)->shinfo)) /** * skb_queue_empty - check if a queue is empty @@ -1045,7 +1055,8 @@ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, { struct sk_buff *skb; - skb = alloc_skb(length+16, gfp_mask); + //skb = alloc_skb(length+16, gfp_mask); + skb = alloc_zc_skb(length+16, gfp_mask); if (skb) skb_reserve(skb,16); return skb; diff --git a/xen-2.4.16/net/dev.c b/xen-2.4.16/net/dev.c index 9d02fb3f28..749d50aa3f 100644 --- a/xen-2.4.16/net/dev.c +++ b/xen-2.4.16/net/dev.c @@ -30,6 +30,7 @@ #include #include +#include #define BUG_TRAP ASSERT #define notifier_call_chain(_a,_b,_c) ((void)0) @@ -695,6 +696,21 @@ int netif_rx(struct sk_buff *skb) if (skb->stamp.tv_sec == 0) get_fast_time(&skb->stamp); + /* Attempt to handle zero-copy packets here: */ + if (skb->skb_type == SKB_ZERO_COPY) + { + skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT)); + + /* remapping this address really screws up all the skb pointers. We need + * to map them all here sufficiently to get the packet demultiplexed. + */ + + skb->data = skb->head; + skb_reserve(skb,16); // need to ensure that all the drivers and not just tulip do this. + skb->mac.raw = skb->data; + skb->data += ETH_HLEN; + } + /* The code is rearranged so that the path is the most short when CPU is congested, but is still operating. */ @@ -747,10 +763,18 @@ drop: netdev_rx_stat[this_cpu].dropped++; local_irq_restore(flags); + if (skb->skb_type == SKB_ZERO_COPY) + unmap_domain_mem(skb->head); + kfree_skb(skb); return NET_RX_DROP; found: + if (skb->skb_type == SKB_ZERO_COPY) { + unmap_domain_mem(skb->head); + //skb->head = (u8 *)((skb->pf - frame_table) << PAGE_SHIFT); + skb->head = skb->data = skb->tail = (void *)0xdeadbeef; + } hyp_event_notify(cpu_mask); local_irq_restore(flags); return 0; @@ -930,8 +954,28 @@ void flush_rx_queue(void) rx = shadow_ring->rx_ring+i; if ( (skb->len + ETH_HLEN) < rx->size ) rx->size = skb->len + ETH_HLEN; + + /* remap the packet again. This is very temporary and will shortly be + * replaced with a page swizzle. + */ + + if (skb->skb_type == SKB_ZERO_COPY) + { + skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT)); + skb->data = skb->head; + skb_reserve(skb,16); + skb->mac.raw = skb->data; + skb->data += ETH_HLEN; + } + copy_to_user((void *)rx->addr, skb->mac.raw, rx->size); copy_to_user(net_ring->rx_ring+i, rx, sizeof(rx)); + + if (skb->skb_type == SKB_ZERO_COPY) + { + unmap_domain_mem(skb->head); + skb->head = skb->data = skb->tail = (void *)0xdeadbeef; + } } net_ring->rx_cons = (i+1) & (RX_RING_SIZE-1); if ( net_ring->rx_cons == net_ring->rx_event ) diff --git a/xen-2.4.16/net/eth.c b/xen-2.4.16/net/eth.c index d982eef39b..5238de022e 100644 --- a/xen-2.4.16/net/eth.c +++ b/xen-2.4.16/net/eth.c @@ -161,52 +161,62 @@ unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev) struct ethhdr *eth; unsigned char *rawp; - skb->mac.raw=skb->data; - skb_pull(skb,dev->hard_header_len); - eth= skb->mac.ethernet; + if (skb->skb_type == SKB_ZERO_COPY) + { + skb_pull(skb,dev->hard_header_len); + skb->mac.raw= (void *)0xdeadbeef; + return htons(ETH_P_802_2); + + } else { // SKB_NORMAL + + skb->mac.raw=skb->data; + skb_pull(skb,dev->hard_header_len); + eth= skb->mac.ethernet; - if(*eth->h_dest&1) - { - if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0) + if(*eth->h_dest&1) + { + if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0) skb->pkt_type=PACKET_BROADCAST; else skb->pkt_type=PACKET_MULTICAST; - } + } - /* - * This ALLMULTI check should be redundant by 1.4 - * so don't forget to remove it. - * - * Seems, you forgot to remove it. All silly devices - * seems to set IFF_PROMISC. - */ + /* + * This ALLMULTI check should be redundant by 1.4 + * so don't forget to remove it. + * + * Seems, you forgot to remove it. All silly devices + * seems to set IFF_PROMISC. + */ - else if(1 /*dev->flags&IFF_PROMISC*/) - { + else if(1 /*dev->flags&IFF_PROMISC*/) + { if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN)) skb->pkt_type=PACKET_OTHERHOST; - } + } - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= 1536) return eth->h_proto; - rawp = skb->data; + rawp = skb->data; - /* - * This is a magic hack to spot IPX packets. Older Novell breaks - * the protocol design and runs IPX over 802.3 without an 802.2 LLC - * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This - * won't work for fault tolerant netware but does for the rest. - */ - if (*(unsigned short *)rawp == 0xFFFF) + /* + * This is a magic hack to spot IPX packets. Older Novell breaks + * the protocol design and runs IPX over 802.3 without an 802.2 LLC + * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This + * won't work for fault tolerant netware but does for the rest. + */ + if (*(unsigned short *)rawp == 0xFFFF) return htons(ETH_P_802_3); - /* - * Real 802.2 LLC - */ - return htons(ETH_P_802_2); + /* + * Real 802.2 LLC + */ + return htons(ETH_P_802_2); + } } + int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) { struct ethhdr *eth = skb->mac.ethernet; diff --git a/xen-2.4.16/net/skbuff.c b/xen-2.4.16/net/skbuff.c index 07896fda78..abd2c73ed1 100644 --- a/xen-2.4.16/net/skbuff.c +++ b/xen-2.4.16/net/skbuff.c @@ -149,6 +149,102 @@ static __inline__ void skb_head_to_pool(struct sk_buff *skb) kmem_cache_free(skbuff_head_cache, skb); } +static inline u8 *alloc_skb_data_page(struct sk_buff *skb) +{ + struct list_head *list_ptr; + struct pfn_info *pf; + unsigned long flags; + + spin_lock_irqsave(&free_list_lock, flags); + + if (!free_pfns) return NULL; + + list_ptr = free_list.next; + pf = list_entry(list_ptr, struct pfn_info, list); + pf->flags = 0; // owned by dom0 + list_del(&pf->list); + pf->next = pf->prev = (pf - frame_table); + free_pfns--; + + spin_unlock_irqrestore(&free_list_lock, flags); + + skb->pf = pf; + return (u8 *)((pf - frame_table) << PAGE_SHIFT); +} + +static inline void dealloc_skb_data_page(struct sk_buff *skb) +{ + struct pfn_info *pf; + unsigned long flags; + + pf = skb->pf; + + spin_lock_irqsave(&free_list_lock, flags); + + list_add_tail(&pf->list, &free_list); + free_pfns++; + + spin_unlock_irqrestore(&free_list_lock, flags); +} + +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask) +{ + struct sk_buff *skb; + u8 *data; + + if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { + static int count = 0; + if (++count < 5) { + printk(KERN_ERR "alloc_skb called nonatomically " + "from interrupt %p\n", NET_CALLER(size)); + BUG(); + } + gfp_mask &= ~__GFP_WAIT; + } + + /* Get the HEAD */ + skb = skb_head_from_pool(); + if (skb == NULL) { + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); + if (skb == NULL) + goto nohead; + } + + /* Get the DATA. Size must match skb_add_mtu(). */ + size = SKB_DATA_ALIGN(size); + data = alloc_skb_data_page(skb); + if (data == NULL) + goto nodata; + + /* XXX: does not include slab overhead */ + skb->truesize = size + sizeof(struct sk_buff); + + /* Load the data pointers. */ + skb->head = data; + skb->data = data; + skb->tail = data; + skb->end = data + size; + + /* Set up other state */ + skb->len = 0; + skb->cloned = 0; + skb->data_len = 0; + skb->src_vif = VIF_UNKNOWN_INTERFACE; + skb->dst_vif = VIF_UNKNOWN_INTERFACE; + skb->skb_type = SKB_ZERO_COPY; + + atomic_set(&skb->users, 1); + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + return skb; + +nodata: + skb_head_to_pool(skb); +nohead: + return NULL; +} + /* Allocate a new skbuff. We do this ourselves so we can fill in a few * 'private' fields and also do memory statistics to find all the @@ -213,6 +309,7 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) skb->data_len = 0; skb->src_vif = VIF_UNKNOWN_INTERFACE; skb->dst_vif = VIF_UNKNOWN_INTERFACE; + skb->skb_type = SKB_NORMAL; atomic_set(&skb->users, 1); atomic_set(&(skb_shinfo(skb)->dataref), 1); @@ -295,7 +392,13 @@ static void skb_release_data(struct sk_buff *skb) if (skb_shinfo(skb)->frag_list) skb_drop_fraglist(skb); - kfree(skb->head); + if (skb->skb_type == SKB_NORMAL) { + kfree(skb->head); + } else if (skb->skb_type == SKB_ZERO_COPY) { + dealloc_skb_data_page(skb); + } else { + printk("skb_release_data called with unknown skb type!\n"); + } } } -- 2.30.2